Draw samples from Uniform Distribution

In [357]:
# Import necessary Library
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
import seaborn as sns
In [358]:
import random
import numpy as np
random.seed(42)
X = np.random.uniform(0,1,20)
print(X)
#All values are within the given interval:
np.all(X >= 0)

np.all(X < 1)
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152 0.79172504 0.52889492
 0.56804456 0.92559664 0.07103606 0.0871293  0.0202184  0.83261985
 0.77815675 0.87001215]
Out[358]:
True
In [359]:
# Display the histogram of the samples, along with the probability density function:
import matplotlib.pyplot as plt
count, bins, ignored = plt.hist(X, 15, normed=True)
plt.plot(bins, np.ones_like(bins), linewidth=2, color='r')
plt.show()
C:\Users\mxm5116\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:3: MatplotlibDeprecationWarning: 
The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.
  This is separate from the ipykernel package so we can avoid doing imports until

Draw Sample N from the normal gaussian distribution :

In [360]:
random.seed(42)
mu, sigma = 0, 1 # mean and standard deviation
N = np.random.normal(mu, sigma, 20)
print(N)
[ 1.49407907 -0.20515826  0.3130677  -0.85409574 -2.55298982  0.6536186
  0.8644362  -0.74216502  2.26975462 -1.45436567  0.04575852 -0.18718385
  1.53277921  1.46935877  0.15494743  0.37816252 -0.88778575 -1.98079647
 -0.34791215  0.15634897]
In [361]:
#Display the histogram of the samples, along with the probability density function:
import matplotlib.pyplot as plt
count, bins, ignored = plt.hist(N, 30, density=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) ),linewidth=2, color='r')
plt.show()

a. Generate 20 data pairs (X, Y) using y = sin(2piX) + N

In [362]:
import matplotlib.pyplot as plt
y=np.sin(2*np.pi*X)+N
print(y)
print(X)
plt.scatter(X,y)
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()

# Showing X and y values in data frame
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':X, 'y':y})
print(df)
[ 1.1921607  -1.18133389 -0.28867505 -1.13238193 -2.09148471 -0.13996717
  1.24661332 -1.37096605  2.04341941 -0.78574038 -0.92007232 -0.36774027
  1.11814883  1.01871165  0.58660785  0.89867372 -0.76109122 -2.84905465
 -1.33230363 -0.5725674 ]
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152 0.79172504 0.52889492
 0.56804456 0.92559664 0.07103606 0.0871293  0.0202184  0.83261985
 0.77815675 0.87001215]
           x         y
0   0.548814  1.192161
1   0.715189 -1.181334
2   0.602763 -0.288675
3   0.544883 -1.132382
4   0.423655 -2.091485
5   0.645894 -0.139967
6   0.437587  1.246613
7   0.891773 -1.370966
8   0.963663  2.043419
9   0.383442 -0.785740
10  0.791725 -0.920072
11  0.528895 -0.367740
12  0.568045  1.118149
13  0.925597  1.018712
14  0.071036  0.586608
15  0.087129  0.898674
16  0.020218 -0.761091
17  0.832620 -2.849055
18  0.778157 -1.332304
19  0.870012 -0.572567
In [363]:
# Divide the data into test and training data set (10 pairs of data for test and training data set)

df.train=df[0:10]
print(df.train)
          x         y
0  0.548814  1.192161
1  0.715189 -1.181334
2  0.602763 -0.288675
3  0.544883 -1.132382
4  0.423655 -2.091485
5  0.645894 -0.139967
6  0.437587  1.246613
7  0.891773 -1.370966
8  0.963663  2.043419
9  0.383442 -0.785740
C:\Users\mxm5116\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:3: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  This is separate from the ipykernel package so we can avoid doing imports until
In [364]:
X_train = df.train.iloc[:,0].values 
print(X_train)
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]
In [365]:
y_train = df.train.iloc[:,1].values 
print(y_train)
[ 1.1921607  -1.18133389 -0.28867505 -1.13238193 -2.09148471 -0.13996717
  1.24661332 -1.37096605  2.04341941 -0.78574038]

b. Using root mean square error, find weights of polynomial regression for order is 0, 1, 3, 9

d. Draw a chart of fit data

Let’s apply a linear regression model to this dataset.

In [366]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
# transforming the data to include another axis
x = X_train[:, np.newaxis]
y = y_train[:, np.newaxis]
model = LinearRegression()
model.fit(x, y)
y_pred = model.predict(x)
In [367]:
plt.scatter(x, y, s=10)
plt.plot(x, y_pred, color='r')
plt.show()

Fitting Polynomial Regression to the dataset for order=0

In [368]:
import operator

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
In [369]:
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=0)
x_poly = polynomial_features.fit_transform(x)

model0 = LinearRegression()
model0.fit(x_poly, y)
y_poly_pred = model0.predict(x_poly)

rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print('weights:')
print(model0.coef_)
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order, M=0')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
RMSE: 1.2717577098315476
R2: 0.0
weights:
[[0.]]

Fitting Polynomial Regression to the dataset for order=1

In [370]:
import operator

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
In [371]:
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=1)
x_poly = polynomial_features.fit_transform(x)

model1 = LinearRegression()
model1.fit(x_poly, y)
y_poly_pred = model1.predict(x_poly)

rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print(model1.coef_)
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order, M=1')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
RMSE: 1.2516610787428275
R2: 0.03135478551851867
[[0.         1.22072834]]

Fitting Polynomial Regression to the dataset for order=3

In [372]:
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=3)
x_poly = polynomial_features.fit_transform(x)

model3 = LinearRegression()
model3.fit(x_poly, y)
y_poly_pred = model3.predict(x_poly)

rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print(model3.coef_)

plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order, M=3')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
RMSE: 0.9725937278786453
R2: 0.41513696880229156
[[   0.         -248.15464422  387.55479061 -191.01406685]]

Fitting Polynomial Regression to the dataset for order=9

In [373]:
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=9)
x_poly = polynomial_features.fit_transform(x)

model9 = LinearRegression()
model9.fit(x_poly, y)
y_poly_pred = model9.predict(x_poly)

rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print(model9.coef_)

plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order,M =9')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
RMSE: 2.8577523264996737e-07
R2: 0.9999999999999495
[[ 0.00000000e+00  1.18595884e+08 -8.27948450e+08  3.33303593e+09
  -8.52527882e+09  1.43662652e+10 -1.59475546e+10  1.12439784e+10
  -4.56863257e+09  8.15015547e+08]]

c. Display weights in table

In [374]:
# Print the co-efficents for M=0,1,3,9
print(model0.coef_)
print(model1.coef_)
print(model3.coef_)
print(model9.coef_)
[[0.]]
[[0.         1.22072834]]
[[   0.         -248.15464422  387.55479061 -191.01406685]]
[[ 0.00000000e+00  1.18595884e+08 -8.27948450e+08  3.33303593e+09
  -8.52527882e+09  1.43662652e+10 -1.59475546e+10  1.12439784e+10
  -4.56863257e+09  8.15015547e+08]]
In [375]:
import pandas as pd 

# intialise data of lists. 
data = {'M=0':[0,"","","","","","","","","" ],'M=1':[0,-0.39012766,"","","","","","","",""],'M=3':[0,-0.72768993,2.11420232,-1.986349090,"","","","","",""],'M=9':[ 0,-32.42123625,630.43839794,-4652.54557797,
   16038.92928046, -26616.73893602,16020.44924781,9291.87084083,-16637.65286273,5956.51875661]} 
print(data)
# Creates pandas DataFrame. 
df.w = pd.DataFrame(data, index =['w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6', 'w7', 'w8', 'w9']) 
  
# print the data 
df.w 
{'M=0': [0, '', '', '', '', '', '', '', '', ''], 'M=1': [0, -0.39012766, '', '', '', '', '', '', '', ''], 'M=3': [0, -0.72768993, 2.11420232, -1.98634909, '', '', '', '', '', ''], 'M=9': [0, -32.42123625, 630.43839794, -4652.54557797, 16038.92928046, -26616.73893602, 16020.44924781, 9291.87084083, -16637.65286273, 5956.51875661]}
C:\Users\mxm5116\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:8: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  
Out[375]:
M=0 M=1 M=3 M=9
w0 0 0 0 0.000000
w1 -0.390128 -0.72769 -32.421236
w2 2.1142 630.438398
w3 -1.98635 -4652.545578
w4 16038.929280
w5 -26616.738936
w6 16020.449248
w7 9291.870841
w8 -16637.652863
w9 5956.518757

e. Draw train error vs test error

In [376]:
# Draw Train error First
for m in range(0,10):
    import operator
    import numpy as np
    import matplotlib.pyplot as plt

    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import mean_squared_error, r2_score
    from sklearn.preprocessing import PolynomialFeatures
    np.random.seed(0)
    polynomial_features= PolynomialFeatures(degree=(m))
    x_poly = polynomial_features.fit_transform(x)

    model0 = LinearRegression()
    model0.fit(x_poly, y)
    y_poly_pred = model0.predict(x_poly)

    rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
    r2 = r2_score(y,y_poly_pred)
    print("RMSE:",rmse)
RMSE: 1.2717577098315476
RMSE: 1.2516610787428275
RMSE: 1.2236087750763132
RMSE: 0.9725937278786453
RMSE: 0.9714749747422678
RMSE: 0.8162029262456223
RMSE: 0.48995687937025406
RMSE: 0.2453508474627798
RMSE: 0.2096583868768381
RMSE: 2.8577523264996737e-07
In [377]:
# Now draw test error
df.test=df[10:19]
print(df.test)
           x         y
10  0.791725 -0.920072
11  0.528895 -0.367740
12  0.568045  1.118149
13  0.925597  1.018712
14  0.071036  0.586608
15  0.087129  0.898674
16  0.020218 -0.761091
17  0.832620 -2.849055
18  0.778157 -1.332304
C:\Users\mxm5116\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  
In [378]:
Xt= df.test.iloc[:,1].values 
print(Xt)
[-0.92007232 -0.36774027  1.11814883  1.01871165  0.58660785  0.89867372
 -0.76109122 -2.84905465 -1.33230363]
In [379]:
yt = df.test.iloc[:,1].values 
print(yt)
[-0.92007232 -0.36774027  1.11814883  1.01871165  0.58660785  0.89867372
 -0.76109122 -2.84905465 -1.33230363]
In [380]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
# transforming the data to include another axis
x = Xt[:, np.newaxis]
y = yt[:, np.newaxis]
In [381]:
# Draw Test error Now
for m in range(0,10):
    import operator
    import numpy as np
    import matplotlib.pyplot as plt

    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import mean_squared_error, r2_score
    from sklearn.preprocessing import PolynomialFeatures
    np.random.seed(0)
    polynomial_features= PolynomialFeatures(degree=(m))
    x_poly = polynomial_features.fit_transform(x)

    model0 = LinearRegression()
    model0.fit(x_poly, y)
    y_poly_pred = model0.predict(x_poly)

    rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
    r2 = r2_score(y,y_poly_pred)
    print("RMSE:",rmse)
RMSE: 1.2533118179082696
RMSE: 8.457231124323202e-16
RMSE: 5.091044805060287e-16
RMSE: 1.3001412665426294e-15
RMSE: 1.464484751053976e-15
RMSE: 2.2056710172663047e-15
RMSE: 2.3138030711294517e-14
RMSE: 3.673626791684377e-14
RMSE: 1.0823941433404824e-13
RMSE: 3.2837446224769394e-14

Now plot Train Vs Test Error

In [382]:
import pandas as pd 

# intialise data of lists. 
data1 = {'M':[0,1,2,3,4,5,6,7,8,9],'Train.RMSE':[1.2717577098315476,1.2516610787428275,1.2236087750763132,0.9725937278786453,0.9714749747422678,0.8162029262456223,0.48995687937025406,0.2453508474627798,0.2096583868768381,2.8577523264996737e-07],'Test.RMSE':[1.2533118179082696,8.457231124323202e-16,5.091044805060287e-16,1.3001412665426294e-15,1.464484751053976e-15,2.2056710172663047e-15,2.3138030711294517e-14,3.673626791684377e-14,1.0823941433404824e-13,3.2837446224769394e-14]} 
print(data)
# Creates pandas DataFrame. 
df1 = pd.DataFrame(data1) 
  
# print the data 
df1 
{'M=0': [0, '', '', '', '', '', '', '', '', ''], 'M=1': [0, -0.39012766, '', '', '', '', '', '', '', ''], 'M=3': [0, -0.72768993, 2.11420232, -1.98634909, '', '', '', '', '', ''], 'M=9': [0, -32.42123625, 630.43839794, -4652.54557797, 16038.92928046, -26616.73893602, 16020.44924781, 9291.87084083, -16637.65286273, 5956.51875661]}
Out[382]:
M Train.RMSE Test.RMSE
0 0 1.271758e+00 1.253312e+00
1 1 1.251661e+00 8.457231e-16
2 2 1.223609e+00 5.091045e-16
3 3 9.725937e-01 1.300141e-15
4 4 9.714750e-01 1.464485e-15
5 5 8.162029e-01 2.205671e-15
6 6 4.899569e-01 2.313803e-14
7 7 2.453508e-01 3.673627e-14
8 8 2.096584e-01 1.082394e-13
9 9 2.857752e-07 3.283745e-14
In [383]:
# Plot train Vs Test error
plt.plot( 'M', 'Train.RMSE', data=df1, marker='o', markerfacecolor='red', markersize=12, color='skyblue', linewidth=4)
plt.plot( 'M', 'Test.RMSE', data=df1, marker='o',markerfacecolor='blue',markersize=12, color='olive', linewidth=4)
plt.legend()
plt.ylabel('RMSE')
plt.xlabel('M')
plt.ylim((0,2))
plt.xlim((0,9))
plt.show()

f. Now generate 100 more data (120 data pairs) and fit 9th order model and draw fit

In [384]:
random.seed(42)
X2 = np.random.uniform(0,1,120)
print(X2)
mu, sigma = 0, 1 # mean and standard deviation
N = np.random.normal(mu, sigma, 120)
print(N)
import matplotlib.pyplot as plt
y2=np.sin(2*np.pi*X2)+N
print(y2)
print(X2)
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152 0.79172504 0.52889492
 0.56804456 0.92559664 0.07103606 0.0871293  0.0202184  0.83261985
 0.77815675 0.87001215 0.97861834 0.79915856 0.46147936 0.78052918
 0.11827443 0.63992102 0.14335329 0.94466892 0.52184832 0.41466194
 0.26455561 0.77423369 0.45615033 0.56843395 0.0187898  0.6176355
 0.61209572 0.616934   0.94374808 0.6818203  0.3595079  0.43703195
 0.6976312  0.06022547 0.66676672 0.67063787 0.21038256 0.1289263
 0.31542835 0.36371077 0.57019677 0.43860151 0.98837384 0.10204481
 0.20887676 0.16130952 0.65310833 0.2532916  0.46631077 0.24442559
 0.15896958 0.11037514 0.65632959 0.13818295 0.19658236 0.36872517
 0.82099323 0.09710128 0.83794491 0.09609841 0.97645947 0.4686512
 0.97676109 0.60484552 0.73926358 0.03918779 0.28280696 0.12019656
 0.2961402  0.11872772 0.31798318 0.41426299 0.0641475  0.69247212
 0.56660145 0.26538949 0.52324805 0.09394051 0.5759465  0.9292962
 0.31856895 0.66741038 0.13179786 0.7163272  0.28940609 0.18319136
 0.58651293 0.02010755 0.82894003 0.00469548 0.67781654 0.27000797
 0.73519402 0.96218855 0.24875314 0.57615733 0.59204193 0.57225191
 0.22308163 0.95274901 0.44712538 0.84640867 0.69947928 0.29743695
 0.81379782 0.39650574 0.8811032  0.58127287 0.88173536 0.69253159]
[ 0.01050002  1.78587049  0.12691209  0.40198936  1.8831507  -1.34775906
 -1.270485    0.96939671 -1.17312341  1.94362119 -0.41361898 -0.74745481
  1.92294203  1.48051479  1.86755896  0.90604466 -0.86122569  1.91006495
 -0.26800337  0.8024564   0.94725197 -0.15501009  0.61407937  0.92220667
  0.37642553 -1.09940079  0.29823817  1.3263859  -0.69456786 -0.14963454
 -0.43515355  1.84926373  0.67229476  0.40746184 -0.76991607  0.53924919
 -0.67433266  0.03183056 -0.63584608  0.67643329  0.57659082 -0.20829876
  0.39600671 -1.09306151 -1.49125759  0.4393917   0.1666735   0.63503144
  2.38314477  0.94447949 -0.91282223  1.11701629 -1.31590741 -0.4615846
 -0.06824161  1.71334272 -0.74475482 -0.82643854 -0.09845252 -0.66347829
  1.12663592 -1.07993151 -1.14746865 -0.43782004 -0.49803245  1.92953205
  0.94942081  0.08755124 -1.22543552  0.84436298 -1.00021535 -1.5447711
  1.18802979  0.31694261  0.92085882  0.31872765  0.85683061 -0.65102559
 -1.03424284  0.68159452 -0.80340966 -0.68954978 -0.4555325   0.01747916
 -0.35399391 -1.37495129 -0.6436184  -2.22340315  0.62523145 -1.60205766
 -1.10438334  0.05216508 -0.739563    1.5430146  -1.29285691  0.26705087
 -0.03928282 -1.1680935   0.52327666 -0.17154633  0.77179055  0.82350415
  2.16323595  1.33652795 -0.36918184 -0.23937918  1.0996596   0.65526373
  0.64013153 -1.61695604 -0.02432612 -0.73803091  0.2799246  -0.09815039
  0.91017891  0.31721822  0.78632796 -0.4664191  -0.94444626 -0.41004969]
[-2.91418350e-01  8.09694871e-01 -4.74830661e-01  1.23703172e-01
  2.34465581e+00 -2.14134482e+00 -8.88307877e-01  3.40595682e-01
 -1.39945862e+00  2.61224649e+00 -1.37944982e+00 -9.28011227e-01
  1.50831165e+00  1.02986767e+00  2.29921938e+00  1.42655586e+00
 -7.34531159e-01  1.04180677e+00 -1.25239485e+00  7.35400218e-02
  8.13310807e-01 -1.10768705e+00  8.53755560e-01 -5.94521242e-02
  1.05302901e+00 -1.86959762e+00  1.08200950e+00  9.85691438e-01
 -8.31414156e-01  3.61234022e-01  5.60667298e-01  8.60833628e-01
  9.44337873e-01 -9.39367918e-03 -6.52130341e-01 -1.34392766e-01
 -1.32184707e+00 -6.38547353e-01 -9.81974462e-01 -2.33204227e-01
  1.34907126e+00  1.77099982e-01 -5.50345339e-01 -7.23620130e-01
 -2.35759714e+00 -4.38838734e-01  1.13585163e+00  1.35936537e+00
  3.29982719e+00  1.69992282e+00 -1.33971987e+00  1.49329642e+00
 -1.38889179e+00  1.36546039e-01  8.98562307e-01  2.56205077e+00
 -1.56509636e+00  1.73347602e-01  1.11645926e-01  3.35908400e-01
  1.96747707e+00 -4.40693129e-01 -1.97921599e+00  3.25365764e-01
  4.46169585e-01  2.66396045e+00  4.72684950e-02  6.60505015e-01
 -2.07661174e+00  1.41214102e+00 -1.14758617e+00 -1.34907197e+00
  1.04253369e+00 -2.95197206e-01 -7.68666860e-02  5.62471399e-01
  1.83566049e+00  3.44212888e-02 -7.59724944e-02  1.36029245e+00
  1.06740096e-01 -1.76527960e-01 -6.33061829e-02 -9.17902936e-01
 -7.60356202e-01 -3.79622616e-01 -7.89171329e-01 -1.66683804e+00
  1.65950189e-01 -2.03183403e+00 -1.95764505e-01 -8.16187305e-01
 -2.90819220e-03  5.65312675e-01 -3.23352476e-01  1.18023314e+00
 -5.56483368e-01 -1.04208989e+00 -3.56219037e-01 -1.42048064e-01
 -1.27110225e-01  1.81561258e+00  1.16756000e+00  1.10118018e+00
  6.30787475e-01 -6.99836791e-01  5.53044611e-01  2.16724960e-01
  1.62586259e+00 -1.90950059e+00  3.01817305e-01 -1.56010418e+00
 -6.70115784e-01  8.57759034e-01 -1.05493728e-02  9.22622414e-01
  1.06850174e-01 -9.55165722e-01 -1.62100444e+00 -1.34556387e+00]
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152 0.79172504 0.52889492
 0.56804456 0.92559664 0.07103606 0.0871293  0.0202184  0.83261985
 0.77815675 0.87001215 0.97861834 0.79915856 0.46147936 0.78052918
 0.11827443 0.63992102 0.14335329 0.94466892 0.52184832 0.41466194
 0.26455561 0.77423369 0.45615033 0.56843395 0.0187898  0.6176355
 0.61209572 0.616934   0.94374808 0.6818203  0.3595079  0.43703195
 0.6976312  0.06022547 0.66676672 0.67063787 0.21038256 0.1289263
 0.31542835 0.36371077 0.57019677 0.43860151 0.98837384 0.10204481
 0.20887676 0.16130952 0.65310833 0.2532916  0.46631077 0.24442559
 0.15896958 0.11037514 0.65632959 0.13818295 0.19658236 0.36872517
 0.82099323 0.09710128 0.83794491 0.09609841 0.97645947 0.4686512
 0.97676109 0.60484552 0.73926358 0.03918779 0.28280696 0.12019656
 0.2961402  0.11872772 0.31798318 0.41426299 0.0641475  0.69247212
 0.56660145 0.26538949 0.52324805 0.09394051 0.5759465  0.9292962
 0.31856895 0.66741038 0.13179786 0.7163272  0.28940609 0.18319136
 0.58651293 0.02010755 0.82894003 0.00469548 0.67781654 0.27000797
 0.73519402 0.96218855 0.24875314 0.57615733 0.59204193 0.57225191
 0.22308163 0.95274901 0.44712538 0.84640867 0.69947928 0.29743695
 0.81379782 0.39650574 0.8811032  0.58127287 0.88173536 0.69253159]
In [385]:
# Fit 9th order of model and draw fit

np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=9)
x2_poly = polynomial_features.fit_transform(X2.reshape(120,1))

# transforming the data to include another axis
x2 = X2[:, np.newaxis]
y2= y2[:, np.newaxis] 

model = LinearRegression()
model.fit(x2_poly, y2)
y2_poly_pred = model.predict(x2_poly)

rmse = np.sqrt(mean_squared_error(y2,y2_poly_pred))
r2 = r2_score(y2,y2_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)

plt.scatter(x2, y2, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x2,y2_poly_pred), key=sort_axis)
x2, y2_poly_pred = zip(*sorted_zip)
plt.plot(x2, y2_poly_pred, color='m')
plt.title('When order,M =9')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
RMSE: 0.9496777581296596
R2: 0.3341590040516488

g. Now we will regularize using the sum of weights.

h. Draw chart for lamda is 0, 1, 10, 100, 1000, 1000

In [386]:
import math
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures


  
In [387]:
def regularizeRidge(alpha):
    if alpha < 0: alpha = math.exp(alpha)    
    else:
        print("alpha = ",alpha)
        if alpha != 0: print("ln(alpha) = ", math.log(alpha))
    polynomial_features= PolynomialFeatures(degree=9)
    x_transformed =  polynomial_features.fit_transform(X2.reshape(120,1))
    poly_linear_model = Ridge(alpha = alpha)
    poly_linear_model.fit(x_transformed, y2)
    return poly_linear_model

def chartRidge(alpha): 
    model = regularizeRidge(alpha)
    xx = np.linspace(0, 1, 120)
    x_transformed =polynomial_features.fit_transform(xx.reshape(120,1))
    yy = model.predict(x_transformed)
    plt.plot(xx, yy,label=alpha)
    plt.scatter(X_train, y_train)
    plt.scatter(Xt, yt, c = 'r')
    plt.legend()
In [398]:
# When lambda=0, the chart is below
chartRidge(0)
alpha =  10
ln(alpha) =  2.302585092994046
In [397]:
# When lambda=0.10, the chart is below
chartRidge(0.1)
alpha =  0.1
ln(alpha) =  -2.3025850929940455
In [390]:
# When lambda=0.01, the chart is below
chartRidge(0.01)
alpha =  0.01
ln(alpha) =  -4.605170185988091
In [391]:
# When lambda=0.001, the chart is below
chartRidge(0.001)
alpha =  0.001
ln(alpha) =  -6.907755278982137
In [392]:
# When lambda=0.0001, the chart is below
chartRidge(0.0001)
alpha =  0.0001
ln(alpha) =  -9.210340371976182
In [399]:
# When lambda=10, the chart is below
chartRidge(10)
alpha =  10
ln(alpha) =  2.302585092994046
In [400]:
# When lambda=100, the chart is below
chartRidge(100)
alpha =  100
ln(alpha) =  4.605170185988092
In [401]:
# When lambda=1000, the chart is below
chartRidge(1000)
alpha =  1000
ln(alpha) =  6.907755278982137
In [402]:
# When lambda=10000, the chart is below
chartRidge(10000)
alpha =  10000
ln(alpha) =  9.210340371976184

i. Now draw test and train error according to lamda

In [393]:
from sklearn.metrics import mean_squared_error 
import tensorflow as tf
def getMse(Y, yy):
    standard = tf.square(Y - yy)
    mse = tf.reduce_mean(standard)
    return mse.numpy()


train_error_ridge = np.zeros(30)
test_error_ridge = np.zeros(30)

def getErrorRidge(i:int,  model) :     # A new error function
    xx_transformed_test = polynomial_features.fit_transform(Xt.reshape(Xt.shape[0], 1))
    xx_transformed_train = polynomial_features.fit_transform(X_train.reshape(X_train.shape[0], 1))
    yy_test = model.predict(xx_transformed_test)
    yy_train = model.predict(xx_transformed_train)
    test_error_ridge[i] = getMse(yt, yy_test)
    train_error_ridge[i] = getMse(y_train, yy_train)
In [394]:
xx = list(range(-30, 0))
for i in xx:
    model = regularizeRidge(i)
    getErrorRidge(i, model)
In [395]:
xx = list(range(-30, 0))
plt.plot(xx, test_error_ridge, label = "$test-error$", c = 'y')
plt.plot(xx, train_error_ridge, label = "$train-error$", c = 'r')

plt.xlabel('ln(lamdba)')

plt.ylabel('Error')
plt.legend()
Out[395]:
<matplotlib.legend.Legend at 0x1825eed8cf8>
In [ ]:
 

j. Based on the best test performance, what is your model?

In [396]:
 # Now lets see the best lambda
best_lambda = 0
for i in range(-30,0):
    if test_error_ridge[i+30] == test_error_ridge.min(): best_lambda = i
print("best ln(lambda)based on my analysis = ", best_lambda)
best_lambda_0 = math.exp(best_lambda)
print("best lambda = ", best_lambda_0)
print("In Conclusion of this analysis, the model with input ln(lamdba) = ",best_lambda,", lambda = ",best_lambda_0," will provide best test performance.")
best ln(lambda)based on my analysis =  -2
best lambda =  0.1353352832366127
In Conclusion of this analysis, the model with input ln(lamdba) =  -2 , lambda =  0.1353352832366127  will provide best test performance.